import os
import magic
from config import *
from update import *
from filter import *
from parser import *
from smartsig import *
from extractor import *
from prettyprint import *
from common import file_size

class Binwalk:
	'''
	Primary Binwalk class.

	Interesting class objects:

		self.filter        - An instance of the MagicFilter class.
		self.extractor     - An instance of the Extractor class.
		self.parser        - An instance of the MagicParser class.
		self.display       - An instance of the PrettyPrint class.
		self.magic_files   - A list of magic file path strings to use whenever the scan() method is invoked.
		self.scan_length   - The total number of bytes to be scanned.
		self.total_scanned - The number of bytes that have already been scanned.
	'''

	# Default libmagic flags. Basically disable anything we don't need in the name of speed.
	DEFAULT_FLAGS = magic.MAGIC_NO_CHECK_TEXT | magic.MAGIC_NO_CHECK_ENCODING | magic.MAGIC_NO_CHECK_APPTYPE | magic.MAGIC_NO_CHECK_TOKENS

	# The MAX_SIGNATURE_SIZE limits the amount of data available to a signature.
	# While most headers/signatures are far less than this value, some may reference 
	# pointers in the header structure which may point well beyond the header itself.
	# Passing the entire remaining buffer to libmagic is resource intensive and will
	# significantly slow the scan; this value represents a reasonable buffer size to
	# pass to libmagic which will not drastically affect scan time.
	MAX_SIGNATURE_SIZE = 8092

	# Max number of bytes to process at one time. Everyone should have 50MB of memory, right?
	READ_BLOCK_SIZE = 50 * 1024 * 1024

	# Minimum verbosity level at which to enable extractor verbosity.
	VERY_VERBOSE = 2

	# Scan every byte by default.
	DEFAULT_BYTE_ALIGNMENT = 1

	def __init__(self, magic_files=[], flags=magic.MAGIC_NONE, log=None, quiet=False, verbose=0):
		'''
		Class constructor.

		@magic_files - A list of magic files to use.
		@flags       - Flags to pass to magic_open. [TODO: Might this be more appropriate as an argument to load_signaures?]
		@log         - Output PrettyPrint data to log file as well as to stdout.
		@quiet       - If set to True, supress PrettyPrint output to stdout.
		@verbose     - Verbosity level.

		Returns None.
		'''
		self.flags = self.DEFAULT_FLAGS | flags
		self.magic_files = magic_files
		self.verbose = verbose
		self.total_scanned = 0
		self.scan_length = 0
		self.total_read = 0
		self.magic = None
		self.mfile = None

		# Instantiate the config class so we can access file/directory paths
		self.config = Config()

		# Use the system default magic file if no other was specified
		if not self.magic_files or self.magic_files is None:
			# Append the user's magic file first so that those signatures take precedence
			self.magic_files = [
					self.config.paths['user'][self.config.BINWALK_MAGIC_FILE],
					self.config.paths['system'][self.config.BINWALK_MAGIC_FILE],
			]


		# Only set the extractor verbosity if told to be very verbose
		if self.verbose >= self.VERY_VERBOSE:
			extractor_verbose = True
		else:
			extractor_verbose = False

		# Create an instance of the PrettyPrint class, which can be used to print results to screen/file.
		self.display = PrettyPrint(log=log, quiet=quiet, verbose=verbose, bwalk=self)

		# Create MagicFilter and Extractor class instances. These can be used to:
		#
		#	o Create include/exclude filters
		#	o Specify file extraction rules to be applied during a scan
		#
		self.filter = MagicFilter()
		self.extractor = Extractor(verbose=extractor_verbose)
		
		# Create SmartSignature and MagicParser class instances. These are mostly for internal use.
		self.smart = SmartSignature(self.filter)
		self.parser = MagicParser(self.filter, self.smart)

	def __del__(self):
		'''
		Class deconstructor.
		'''
		self.cleanup()

	def cleanup(self):
		'''
		Cleanup any temporary files generated by the internal instance of MagicParser.

		Returns None.
		'''
		try:
			self.parser.cleanup()
		except:
			pass

	def load_signatures(self, magic_files=[], pre_filter_signatures=True, filter_short_signatures=True):
		'''
		Load signatures from magic file(s).
		Called automatically by Binwalk.scan() with all defaults, if not already called manually.

		@magic_files			- A list of magic files to use (default: self.magic_files).
		@pre_filter_signatures 		- Set to False to disable pre-filtering of signatures before invoking libmagic.
		@filter_short_signatures	- Set to True to include signatures with short (<= 2 byte) magic strings.
	
		Returns None.	
		'''
		# Disable pre filtering in the smart signature class instance.
		# This is also checked by Binwalk.scan() before performing pre-filtering.
		self.smart.pre_filter = pre_filter_signatures

		# The magic files specified here override any already set
		if magic_files and magic_files is not None:
			self.magic_files = magic_files

		# Parse the magic file(s) and initialize libmagic
		self.mfile = self.parser.parse(self.magic_files, filter_short_signatures=filter_short_signatures, pre_filter_signatures=pre_filter_signatures)
		self.magic = magic.open(self.flags)
		self.magic.load(self.mfile)

	def scan(self, target_file, offset=0, length=0, align=DEFAULT_BYTE_ALIGNMENT, show_invalid_results=False, callback=None):
		'''
		Performs a Binwalk scan on the target file.

		@target_file 			- File to scan.
		@offset      			- Starting offset at which to start the scan.
		@length      			- Number of bytes to scan.
		@align       			- Look for signatures every align bytes.
		@show_invalid_results		- Set to True to display invalid results.
		@callback    			- Callback function to be invoked when matches are found.

		The callback function is passed two arguments: a list of result dictionaries containing the scan results
		(one result per dict), and the offset at which those results were identified. Example callback function:

			def my_callback(offset, results):
				print "Found %d results at offset %d:" % (len(results), offset)
				for result in results:
					print "\t%s" % result['description']

			binwalk.Binwalk(callback=my_callback).scan("firmware.bin")

		Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
		and the offsets at which those results were identified:

			scan_items = [
					(0, [{description : "LZMA compressed data..."}]),
					(112, [{description : "gzip compressed data..."}])
			]

		See SmartSignature.parse for a more detailed description of the results dictionary structure.
		'''
		scan_results = {}
		self.total_read = 0
		self.total_scanned = 0
		self.scan_length = length
		self.filter.show_invalid_results = show_invalid_results

		# Load the default signatures if self.load_signatures has not already been invoked
		if self.magic is None:
			self.load_signatures()

		# Get a local copy of the signature sets generated by self.parser.build_signature_set.
		# This is accessed heavily throughout the scan, and there is less overhead for accessing local variables in Python.
		signature_set = self.parser.build_signature_set()

		# Need the total size of the target file, even if we aren't scanning the whole thing
		fsize = file_size(target_file)

		# Open the target file and seek to the specified start offset
		fd = open(target_file)
		fd.seek(offset)
		
		# If no length was specified, make the length the size of the target file minus the starting offset
		if self.scan_length == 0:
			self.scan_length = fsize - offset
		# Sanity check on the byte alignment; default to 1
		if align <= 0:
			align = 1

		# Main loop, scan through all the data
		while True:
			i = 0

			# Read in the next block of data from the target file and make sure it's valid
			(data, dlen) = self._read_block(fd)
			if data is None or dlen == 0:
				break

			# The total number of bytes scanned could be bigger than the total number
			# of bytes read from the file under the following circumstances:
			#
			#	o The previous dlen was not a multiple of align
			#	o A previous result specified a jump offset that was beyond the end of the
			#	  then current data block
			#
			# If this is the case, we need to index into this data block appropriately in order to 
			# resume the scan from the appropriate offset, and adjust dlen accordingly.
			bufindex = self.total_scanned - self.total_read
			if bufindex > 0:
				# If the total_scanned > total_read, then the total_scanned offset is in a subsequent block.
				# Set i to bufindex, which will cause i to be greater than dlen and this block will be skipped.
				i = bufindex
			elif bufindex < 0:
				# If the total_scanned offset is less than total_read, then the total_scanned offset is
				# somewhere inside this block. Set i to index into the block appropriately.
				i = dlen + bufindex
			else:
				# If the total_scanned offset ends at the end of this block, don't scan any of this block
				i = dlen

			# Scan through each block of data looking for signatures
			while i < dlen:
				smart = {}
				results = []
				results_offset = -1
				pre_filter_ok = False
				smart_jump_done = False

				# Pre-filter data by checking to see if the parser thinks this might be a valid match.
				# This eliminates unnecessary calls into libmagic, which are very expensive.
				#
				# Ideally, this should be done in the MagicParser class, but function calls are expensive.
				# Doing it here greatly decreases the scan time.
				if self.smart.pre_filter:
					for (sig_offset, sigset) in signature_set:
                        			if data[i+sig_offset:i+sig_offset+self.parser.MATCH_INDEX_SIZE] in sigset:
                                			pre_filter_ok = True
							break
				else:
					pre_filter_ok = True

				if pre_filter_ok:
					# Pass the data to libmagic, and split out multiple results into a list
					for magic_result in self.parser.split(self.magic.buffer(data[i:i+self.MAX_SIGNATURE_SIZE])):

						# Some file names are not NULL byte terminated, but rather their length is
						# specified in a size field. To ensure these are not marked as invalid due to
						# non-printable characters existing in the file name, parse the filename(s) and
						# trim them to the specified filename length, if one was specified.
						magic_result = self.smart._parse_raw_strings(magic_result)

						# Make sure this is a valid result before further processing
						if not self.filter.invalid(magic_result):
							# The smart filter parser returns a dictionary of keyword values and the signature description.
							smart = self.smart.parse(magic_result)
	
							# Validate the jump value and check if the response description should be displayed
							if smart['jump'] > -1 and self._should_display(smart['description']):
								# If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
								# the calculated results offset will be wrong since i will have been incremented. Only set the
								# results_offset value when the first match is encountered.
								if results_offset < 0:
									results_offset = offset + smart['adjust'] + self.total_scanned

								# Double check to make sure the smart['adjust'] value is sane. 
								# If it makes results_offset negative, then it is not sane.
								if results_offset >= 0:
									# Extract the result, if it matches one of the extract rules and is not a delayed extract.
									if self.extractor.enabled and not (self.extractor.delayed and smart['delay']):
										# If the signature did not specify a size, extract to the end of the file.
										if smart['size'] == 0:
											smart['size'] = fsize-results_offset

										smart['extract'] = self.extractor.extract(	results_offset, 
																smart['description'], 
																target_file, 
																smart['size'], 
																name=smart['name'])

									# This appears to be a valid result, so append it to the results list.
									results.append(smart)

							# Jump to the offset specified by jump. Only do this once, so that if multiple results
							# are returned each of which specify a jump offset, only the first will be honored.
							if smart['jump'] > 0 and not smart_jump_done:
								# Once a jump offset has been honored, we need to start scanning every byte since the
								# jump offset may have thrown off the original alignment. In terms of speed this is fine,
								# since the jump offset usually saves more time anyway. If this is not what the user
								# wanted/intended, disabling pre filtering will disable jump offset processing completely.
								align = self.DEFAULT_BYTE_ALIGNMENT
								smart_jump_done = True
								i += (smart['jump'] - align)
								self.total_scanned += (smart['jump'] - align)

				# Did we find any valid results?
				if results_offset >= 0:
					scan_results[results_offset] = results
					
					if callback is not None:
						callback(results_offset, results)

				# Track the number of bytes scanned in this block, and the total number of bytes scanned.	
				i += align
				self.total_scanned += align

		# Sort the results before returning them
		scan_items = scan_results.items()
		scan_items.sort()

		# Do delayed extraction, if specified.
		if self.extractor.enabled and self.extractor.delayed:
			scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize)

		return scan_items

	def _should_display(self, data):
		'''
		Determines if a result string should be displayed to the user or not.
		
		@data - Display string.

		Returns True if the string should be displayed.
		Returns False if the string should not be displayed.
		'''
		return (data and data is not None and not self.filter.invalid(data) and self.filter.filter(data) != self.filter.FILTER_EXCLUDE)

	def _read_block(self, fd):
		'''
		Reads in a block of data from the target file.

		@fd - File object for the target file.

		Returns a tuple of (file block data, block data length).
		'''
		dlen = 0
		data = None
		# Read in READ_BLOCK_SIZE plus MAX_SIGNATURE_SIZE bytes, but return a max dlen value
		# of READ_BLOCK_SIZE. This ensures that there is a MAX_SIGNATURE_SIZE buffer at the
		# end of the returned data in case a signature is found at or near data[dlen].
		rlen = self.READ_BLOCK_SIZE + self.MAX_SIGNATURE_SIZE

		if self.total_read < self.scan_length:
			
			data = fd.read(rlen)
			
			if data and data is not None:
				# Get the actual length of the read in data
				dlen = len(data)

				# If we've read in more data than the scan length, truncate the dlen value
				if (self.total_read + dlen) >= self.scan_length:
					dlen = self.scan_length - self.total_read
				# If dlen is the expected rlen size, it should be set to READ_BLOCK_SIZE
				elif dlen == rlen:
					dlen = self.READ_BLOCK_SIZE

				# Increment self.total_read to reflect the amount of data that has been read
				# for processing (actual read size is larger of course, due to the MAX_SIGNATURE_SIZE
				# buffer of data at the end of each block).
				self.total_read += dlen
				# Seek to the self.total_read offset so the next read can pick up where this one left off
				fd.seek(self.total_read)

		return (data, dlen)

